clear all
set more off

local path = "...folder path"
cd "`path'"

///Prepare data for sintering 
**************************************************
*Rename variables and create unique equipment IDs*
**************************************************
/*2009 & 2010 January to December*/
forvalues j = 2009(1)2010{
forvalues i=1(1)12{

clear all
import excel "...folder name\sinter`j'.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s 
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size /*for each month, we create an equipment id; and we extract information on equipment size and number from the variable fid*/
gen firmname = fid_adj
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta /*name.dta contains a variable "fime" for abbreviated firm name; by merging with name.dta, we shall use the short firm names consistently across different time periods.*/
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id`j'`i'
rename size size`j'`i'
rename number number`j'`i'

/*generate a new id: plant*//*Note that we have to reconcile the project names (fid_adj) across time in order to create a panel later*/
gen plant = firm + fid_adj 

rename fid fid`j'`i'
rename fid_adj fid_adj`j'`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy >= 2
replace tag = "++" if copy == 3
replace plant = plant + tag
drop tag copy

save sinter`j'_`i'_plant.dta, replace
}
}

/*2011 January to October*/
forvalues i=1(1)10{
clear all
import excel "...folder name\sinter2011.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid_adj
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2011`i'
rename size size2011`i'
rename number number2011`i'

/*generate a new id: plant*/
gen plant = firm + fid_adj

rename fid fid2011`i'
rename fid_adj fid_adj2011`i'
/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy >= 2
replace tag = "++" if copy == 3
replace plant = plant + tag
drop tag copy

save sinter2011_`i'_plant.dta, replace
}

************************
*Create aggregate input*
************************
/*2009/2010 January-December*/
forvalues j = 2009(1)2010{
     forvalues i = 1(1)12{
use sinter`j'_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size`j'`i'
foreach var of varlist fuel`j'`i' std_coal`j'`i' productivity`j'`i' energy`j'`i' return`j'`i' utilization`j'`i' hourly`j'`i' calendar`j'`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size`j'`i' == "0" /*drop firm level information*/

*Split information for the set of equipment stacked together; for example, *2 means the orgininal observation contain aggregate information for two identical machines
expand number`j'`i' 
*adjust data
replace production`j'`i' = production`j'`i'/number`j'`i' if number`j'`i' > 0 & !missing(number`j'`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup) 
replace plant = plant + dup_st 
drop dup*

gen tfuel`j'`i' = fuel`j'`i'*production`j'`i'
gen tfuel_std`j'`i' = std_coal`j'`i'*production`j'`i'
gen tenergy`j'`i' = energy`j'`i'*production`j'`i'
gen labor`j'`i' = production`j'`i'/productivity`j'`i'
gen tore`j'`i' = production`j'`i'/(1-return`j'`i'/100)
save sinter`j'_`i'_plant.dta, replace
}
}

/*2011 January-October*/
forvalues i = 1(1)10{
use sinter2011_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size2011`i'
foreach var of varlist fuel2011`i' std_coal2011`i' productivity2011`i' energy2011`i' return2011`i' utilization2011`i' hourly2011`i' calendar2011`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size2011`i' == "0" /*drop firm level information*/

*generate duplicates
expand number2011`i'
*adjust data
replace production2011`i' = production2011`i'/number2011`i' if number2011`i' > 0 & !missing(number2011`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup*

gen tfuel2011`i' = fuel2011`i'*production2011`i'
gen tfuel_std2011`i' = std_coal2011`i'*production2011`i'
gen tenergy2011`i' = energy2011`i'*production2011`i'
gen labor2011`i' = production2011`i'/productivity2011`i'
gen tore2011`i' = production2011`i'/(1-return2011`i'/100)
save sinter2011_`i'_plant.dta, replace
}

****************************************************************
*Merge dataset by plant; transform 0901-0905 into monthly level*
****************************************************************
/*Note that for these time periods, accumulated data are reported instead from 2009 January up to that specific time period*/

use sinter2009_1_plant.dta, clear
forvalues i = 2(1)5{
merge 1:1 plant using sinter2009_`i'_plant.dta
sort _merge plant
drop _merge
}

forvalues i = 1(1)5{
gen m2009`i' = production2009`i'/utilization2009`i'
gen t2009`i' = production2009`i'/hourly2009`i'
gen k2009`i' = 100*production2009`i'/calendar2009`i'
}

/*replace missing value with 0 because in the ealier process treat null as a separate group!*/
foreach x in tfuel2009 tfuel_std2009 tenergy2009 labor2009 tore2009{
forvalues i = 1(1)5{
replace `x'`i' = 0 if `x'`i' == .
}
}

forvalues i = 1(1)4{
local j = 6 - `i'
local k = `j' - 1
foreach x in production2009 tfuel2009 tfuel_std2009 tenergy2009 tore2009 m2009 t2009 k2009{
replace `x'`j' = `x'`j' - `x'`k'
}
}

forvalues i = 2(1)5{
replace energy2009`i' = tenergy2009`i'/production2009`i'
replace productivity2009`i' = production2009`i'/labor2009`i'
drop m2009`i' t2009`i' k2009`i'
}


forvalues i = 6(1)12{
merge 1:1 plant using sinter2009_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)12{
merge 1:1 plant using sinter2010_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)10{
merge 1:1 plant using sinter2011_`i'_plant.dta
sort plant
drop _merge
}

keep plant firm return* production* grade* fuel* std_coal* energy* productivity* utilization* hourly* calendar* tfuel* tfuel_std* tore* tenergy* labor* number* size*
drop grade_stability*
reshape long return production grade fuel std_coal energy productivity utilization hourly calendar tfuel tfuel_std tore tenergy labor number size, i(plant) j(t)


/*add owner region and birth year*/
merge m:1 firm using owner.dta
drop if _merge == 2
drop _merge

/*generate time variable: monthly*/
gen month = 1 
gen year = 2009

forvalues i = 2009(1)2011{
forvalues j = 1(1)10{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

forvalues i = 2009(1)2010{
forvalues j = 11(1)12{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

gen time = ym(year,month)
format time %tm

encode plant, gen(id)
xtset id time, monthly

replace size = "65" if size == "30-100"
replace size = "200" if size == ">100"
destring size, force replace

save sinter_plant_pf.dta, replace


///Prepare data for pig-iron making
**************************************************
*Rename variables and create unique equipment IDs*
**************************************************
/*2009 & 2010 January to December*/
forvalues j = 2009(1)2010{
forvalues i=1(1)12{

clear all
import excel "...folder name\iron`j'.xlsx", sheet("`i'") firstrow

format fid %-55s
replace fid = subinstr(fid," ","",.)
gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size/*for each month, we create an equipment id; and we extract information on equipment size and number from the variable fid*/
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta /*name.dta contains a variable "fime" for abbreviated firm name; by merging with name.dta, we shall use the short firm names consistently across different time periods.*/
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2009`i'
rename size size2009`i'
rename number number2009`i'

/*generate a new id: plant*//*Note that we have to reconcile the project names (fid_adj) across time in order to create a panel later*/
gen scale = string(size2009`i')
gen plant = firm + fid_adj
rename fid fid2009`i'
rename fid_adj fid_adj2009`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop tag copy scale

save iron2009_`i'_plant.dta, replace
}
}

/*2011 January to October*/
forvalues i=1(1)10{
clear all
import excel "...folder name\iron2011.xlsx", sheet("`i'") firstrow

format fid %-55s
replace fid = subinstr(fid," ","",.)
gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2011`i'
rename size size2011`i'
rename number number2011`i'

/*generate a new id: plant*/
gen scale = string(size2011`i')
gen plant = firm + fid_adj

rename fid fid2011`i'
rename fid_adj fid_adj2011`i'
/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop tag copy scale

save iron2011_`i'_plant.dta, replace
}


************************
*Create aggregate input*
************************
forvalues j = 2009(1)2010{
     forvalues i = 1(1)12{
use iron`j'_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size`j'`i'
foreach var of varlist ore`j'`i' coke`j'`i' coke_equiv`j'`i' productivity`j'`i' energy`j'`i' grade`j'`i' utilization`j'`i' qualification`j'`i' grade1_`j'`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

/*fix production for pansteel*/
sort firmname size`j'`i'
bysort firmname: egen t_pdt = sum(production`j'`i')
bysort firmname: egen t_size = sum(size`j'`i')
replace production`j'`i' = t_pdt*size`j'`i'/t_size if firm == "攀钢股份" 
drop t_pdt t_size

drop if size`j'`i' == 0 /*drop firm level information*/

*generate duplicates
expand number`j'`i'
*adjust data
replace production`j'`i' = production`j'`i'/number`j'`i' if number`j'`i' > 0 & !missing(number`j'`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup*

gen tore`j'`i' = ore`j'`i'*production`j'`i'
gen tcoke`j'`i' = coke`j'`i'*production`j'`i'
gen tcoke_equiv`j'`i' = coke_equiv`j'`i'*production`j'`i'
gen tore_made`j'`i' = ore_made`j'`i'/100*tore`j'`i'
gen labor`j'`i' = production`j'`i'/productivity`j'`i'
gen tenergy`j'`i' = energy`j'`i'*production`j'`i'
save iron`j'_`i'_plant.dta, replace
}
}

forvalues i = 1(1)10{
use iron2011_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size2011`i'
foreach var of varlist ore2011`i' coke2011`i' coke_equiv2011`i' productivity2011`i' energy2011`i' grade2011`i' utilization2011`i' qualification2011`i' grade1_2011`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

sort firmname size2011`i'
bysort firmname: egen t_pdt = sum(production2011`i')
bysort firmname: egen t_size = sum(size2011`i')
replace production2011`i' = t_pdt*size2011`i'/t_size if firm == "攀钢股份"
drop t_pdt t_size

drop if size2011`i' == 0 /*drop firm level information*/

*generate duplicates
expand number2011`i'
*adjust data
replace production2011`i' = production2011`i'/number2011`i' if number2011`i' > 0 & !missing(number2011`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup*

gen tore2011`i' = ore2011`i'*production2011`i'
gen tcoke2011`i' = coke2011`i'*production2011`i'
gen tcoke_equiv2011`i' = coke_equiv2011`i'*production2011`i'
gen tore_made2011`i' = ore_made2011`i'/100*tore2011`i'
gen labor2011`i' = production2011`i'/productivity2011`i'
gen tenergy2011`i' = energy2011`i'*production2011`i'
save iron2011_`i'_plant.dta, replace
}

****************************************************************
*Merge dataset by plant; transform 0901-0905 into monthly level*
****************************************************************
use iron2009_1_plant.dta, clear
forvalues i = 2(1)5{
merge 1:1 plant using iron2009_`i'_plant.dta
sort _merge plant
drop _merge
}

forvalues i = 1(1)5{
gen m2009`i' = production2009`i'/utilization2009`i'
gen tgrade2009`i' = grade2009`i'*tore2009`i'
}

forvalues i = 1(1)4{
local j = 6 - `i'
local k = `j' - 1
foreach x in production2009 tore2009 tcoke2009 tcoke_equiv2009 tenergy2009 tore_made2009 m2009 tgrade2009{
replace `x'`j' = `x'`j' - `x'`k'
}
}

forvalues i = 2(1)5{
replace energy2009`i' = tenergy2009`i'/production2009`i'
replace productivity2009`i' = production2009`i'/labor2009`i'
*replace utilization9`i' = pdt9`i'/m9`i'
replace ore_made2009`i' = tore_made2009`i'/tore2009`i'
replace grade2009`i' = tgrade2009`i'/tore2009`i'
drop m2009`i' tgrade2009`i'
}

forvalues i = 6(1)12{
merge 1:1 plant using iron2009_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)12{
merge 1:1 plant using iron2010_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)10{
merge 1:1 plant using iron2011_`i'_plant.dta
sort plant
drop _merge
}

keep firm qualification* plant utilization* production* labor* productivity* tenergy* energy* tore* ore* tcoke_equiv* coke_equiv* grade* number* size*
drop ore_made*
reshape long qualification grade1_ utilization production labor productivity tenergy energy tore ore tcoke_equiv coke_equiv grade tcoke tore_made number size, i(plant) j(t)
rename grade1_ grade1

/*add owner region and birth year*/
merge m:1 firm using owner.dta
drop if _merge == 2
drop _merge

/*generate time variable: monthly*/
gen month = 1 
gen year = 2009

forvalues i = 2009(1)2011{
forvalues j = 1(1)10{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

forvalues i = 2009(1)2010{
forvalues j = 11(1)12{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

gen time = ym(year,month)
format time %tm

encode plant, gen(id)
xtset id time, monthly


/*missing values for ore grade*/
gen quality = grade
replace quality = . if quality == 0 | quality >100
replace quality = - quality if quality < 0 
bysort plant year: egen quality1 = mean(quality) 
bysort plant: egen quality2 = mean(quality) 
replace quality = quality1 if quality == . 
replace quality = quality2 if quality == . 
drop quality1 quality2

save iron_plant_pf.dta, replace


///Prepare data for steel making
**************************************************
*Rename variables and create unique equipment IDs*
**************************************************
/*2009 from January to March*/
forvalues i=1(1)3{
clear all
import excel "...folder name\steel2009.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2009`i'
rename size size2009`i'
rename number number2009`i'

/*generate a new id: plant*/
gen scale = string(size2009`i')
gen plant = firm + fid_adj
rename fid fid2009`i'
rename fid_adj fid_adj2009`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop tag copy scale

save steel2009_`i'_plant.dta, replace
}

forvalues i=5(1)12{
clear all
import excel "...folder name\steel2009.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2009`i'
rename size size2009`i'
rename number number2009`i'

/*generate a new id: plant*/
gen scale = string(size2009`i')
gen plant = firm + fid_adj
rename fid fid2009`i'
rename fid_adj fid_adj2009`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop tag copy scale

save steel2009_`i'_plant.dta, replace
}

*------------------------------------------------------------------------------*
/*2010 January to December*/
forvalues i=1(1)12{
clear all
import excel "...folder name\steel2010.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2010`i'
rename size size2010`i'
rename number number2010`i'

/*generate a new id: plant*/
gen scale = string(size2010`i')
gen plant = firm + fid_adj
rename fid fid2010`i'
rename fid_adj fid_adj2010`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop  tag copy scale

save steel2010_`i'_plant.dta, replace
}

/*2011 January to October*/
forvalues i=1(1)10{
clear all
import excel "...folder name\steel2011.xlsx", sheet("`i'") firstrow

gen fid_adj = fid
format fid_adj %-55s
replace fid_adj = subinstr(fid_adj," ","",.)

sort id size
gen firmname = fid
bysort id: replace firmname = firmname[1]
format firmname %-55s
replace firmname = subinstr(firmname," ","",.)

merge m:1 firmname using name.dta
drop if _merge == 2
drop _merge
sort id size
bysort id: egen nb = sum(number) /*get total number of machines*/
replace number = nb if number == . 

drop nb
drop if id == .
rename id id2011`i'
rename size size2011`i'
rename number number2011`i'

/*generate a new id: plant*/
gen scale = string(size2011`i')
gen plant = firm + fid_adj
rename fid fid2011`i'
rename fid_adj fid_adj2011`i'

/*address duplicated plant*/
bysort plant: gen copy = _n 
gen tag = "+" if copy == 2
replace plant = plant + tag
drop tag copy scale

save steel2011_`i'_plant.dta, replace
}

************************
*Create aggregate input*
************************

/*2009 and 2010 from January to March*/
forvalues j = 2009(1)2010{
     forvalues i = 1(1)3{
use steel`j'_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size`j'`i'
foreach var of varlist metal`j'`i' pigiron`j'`i' ironlump`j'`i' scrap`j'`i' other_mat`j'`i' alloy`j'`i' energy`j'`i' oxygen`j'`i' lime`j'`i' utilization`j'`i' calendar`j'`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size`j'`i' == 0 /*drop firm level information*/
*generate duplicates
expand number`j'`i'
*adjust data
replace production`j'`i' = production`j'`i'/number`j'`i' if number`j'`i' > 0 & !missing(number`j'`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup_st
rename dup dup`j'`i'

foreach var of varlist metal`j'`i' pigiron`j'`i' ironlump`j'`i' scrap`j'`i' other_mat`j'`i' alloy`j'`i' energy`j'`i' oxygen`j'`i' lime`j'`i'{
gen t`var' = `var'*production`j'`i'
}
gen labor`j'`i' = production`j'`i'/productivity`j'`i'

save steel`j'_`i'_plant.dta, replace
}
}

/*2010 April*/
use steel2010_4_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size20104
foreach var of varlist metal20104 pigiron20104 ironlump20104 scrap20104 other_mat20104 alloy20104 energy20104 oxygen20104 lime20104 utilization20104 calendar20104{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size20104 == 0 /*drop firm level information*/

*generate duplicates
expand number20104
*adjust data
replace production20104 = production20104/number20104 if number20104 > 0 & !missing(number20104) 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup_st
rename dup dup20104

foreach var of varlist metal20104 pigiron20104 ironlump20104 scrap20104 other_mat20104 alloy20104 energy20104 oxygen20104 lime20104{
gen t`var' = `var'*production20104
}
gen labor20104 = production20104/productivity20104
save steel2010_4_plant.dta, replace

/*2009 and 2010 from May to December*/
forvalues j = 2009(1)2010{
     forvalues i = 5(1)12{
use steel`j'_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size`j'`i'
foreach var of varlist metal`j'`i' pigiron`j'`i' ironlump`j'`i' scrap`j'`i' other_mat`j'`i' alloy`j'`i' energy`j'`i' oxygen`j'`i' lime`j'`i' utilization`j'`i' calendar`j'`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size`j'`i' == 0 /*drop firm level information*/

*generate duplicates
expand number`j'`i'
*adjust data
replace production`j'`i' = production`j'`i'/number`j'`i' if number`j'`i' > 0 & !missing(number`j'`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup_st
rename dup dup`j'`i'

foreach var of varlist metal`j'`i' pigiron`j'`i' ironlump`j'`i' scrap`j'`i' other_mat`j'`i' alloy`j'`i' energy`j'`i' oxygen`j'`i' lime`j'`i'{
gen t`var' = `var'*production`j'`i'
}
gen labor`j'`i' = production`j'`i'/productivity`j'`i'

save steel`j'_`i'_plant.dta, replace
}
}

forvalues i = 1(1)10{
use steel2011_`i'_plant.dta, clear
/*replace missing values with firm-level info*/
sort firmname size2011`i'
foreach var of varlist metal2011`i' pigiron2011`i' ironlump2011`i' scrap2011`i' other_mat2011`i' alloy2011`i' energy2011`i' oxygen2011`i' lime2011`i' utilization2011`i' calendar2011`i'{
bysort firmname: replace `var' = `var'[1] if `var' == . | `var' == 0
}

drop if size2011`i' == 0 /*drop firm level information*/

*generate duplicates
expand number2011`i'
*adjust data
replace production2011`i' = production2011`i'/number2011`i' if number2011`i' > 0 & !missing(number2011`i') 
*assign numerical identifier
bysort plant: gen dup = _n
gen dup_st = ""
replace dup_st = string(dup)
replace plant = plant + dup_st
drop dup_st
rename dup dup2011`i'

foreach var of varlist metal2011`i' pigiron2011`i' ironlump2011`i' scrap2011`i' other_mat2011`i' alloy2011`i' energy2011`i' oxygen2011`i' lime2011`i'{
gen t`var' = `var'*production2011`i'
}
gen labor2011`i' = production2011`i'/productivity2011`i'

save steel2011_`i'_plant.dta, replace
}

****************************************************************
*Merge dataset by plant; transform 0901-0905 into monthly level*
****************************************************************
use steel2009_1_plant.dta, clear
merge 1:1 plant using steel2009_2_plant.dta
sort _merge plant
drop _merge
merge 1:1 plant using steel2009_3_plant.dta
sort _merge plant
drop _merge

forvalues i = 1(1)3{
gen m2009`i' = production2009`i'/utilization2009`i'
gen k2009`i' = 100*production2009`i'/calendar2009`i'
}

/*replace missing value with 0*/
foreach x in tenergy2009 labor2009 tmetal2009 tpigiron2009 tironlump2009 tscrap2009 tother_mat2009 talloy2009 tlime2009 toxygen2009{
replace `x'1 = 0 if `x'1 == .
replace `x'2 = 0 if `x'2 == .
replace `x'3 = 0 if `x'3 == .
}

forvalues i = 1(1)2{
local j = 4 - `i'
local k = `j' - 1
foreach x in production2009 tenergy2009 tmetal2009 tpigiron2009 tironlump2009 tscrap2009 tother_mat2009 talloy2009 tlime2009 toxygen2009 m2009 k2009{
replace `x'`j' = `x'`j' - `x'`k'
}
}

forvalues i = 2(1)3{
replace energy2009`i' = tenergy2009`i'/production2009`i'
replace productivity2009`i' = production2009`i'/labor2009`i'
replace metal2009`i' = tmetal2009`i'/production2009`i'
replace pigiron2009`i' = tpigiron2009`i'/production2009`i'
replace ironlump2009`i' = tironlump2009`i'/production2009`i'
replace scrap2009`i' = tscrap2009`i'/production2009`i'
replace other_mat2009`i' = tother_mat2009`i'/production2009`i'
replace alloy2009`i' = talloy2009`i'/production2009`i'
replace lime2009`i' = tlime2009`i'/production2009`i'
replace oxygen2009`i' = toxygen2009`i'/production2009`i'
}

drop m20091-m20093 k20091-k20093 

forvalues i = 6(1)12{
merge 1:1 plant using steel2009_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)12{
merge 1:1 plant using steel2010_`i'_plant.dta
sort plant
drop _merge
}

forvalues i = 1(1)10{
merge 1:1 plant using steel2011_`i'_plant.dta
sort plant
drop _merge
}

drop fid* id*
reshape long time qualification second smolten_treat lance_life lining production tmetal metal tpigiron tironlump tscrap tother_mat talloy tenergy toxygen tlime labor energy productivity utilization calendar steel_mat pigiron oxygen lime other_mat scrap ironlump alloy size number, i(plant) j(t)
rename time alloy_time

/*add owner region and birth year*/
merge m:1 firm using owner.dta
drop if _merge == 2
drop _merge

/*generate time variable: monthly*/
gen month = 1 
gen year = 2009

forvalues i = 1(1)3{
replace month = `i' if t == 2009`i'
}

forvalues i = 6(1)10{
replace month = `i' if t == 2009`i'
}

forvalues i = 2010(1)2011{
forvalues j = 1(1)10{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

forvalues i = 2009(1)2010{
forvalues j = 11(1)12{
replace month = `j' if t == `i'`j'
replace year = `i' if t == `i'`j'
}
}

gen time = ym(year,month)
format time %tm

encode plant, gen(id)
xtset id time, monthly

save steel_plant_pf.dta, replace





